Import of required packages and custom functions

In addition to the necessary packages, functions are necessary to calculate the final result of each game, the final amount of points for each team and also create a leaderboard according to the set of scores.

# Packages
library(tidyverse)
library(goalmodel)
library(worldfootballR)
library(regista)
library(janitor)
library(magrittr)
library(ggrepel)
library(ggtext)
library(jsonlite)
library(gt)
library(gtExtras)
library(MetBrewer)

# Functions
calcV <- function(hg, ag){
  return(hg > ag)
}
calcD <- function(hg, ag){
  return(hg < ag)
}
calcE <- function(hg, ag){
  return(hg == ag)
}
calcPTS <- function(hg, ag){
  return(ifelse(hg < ag, 0, ifelse(hg == ag, 1, 3)))
}
calcTAB <- function(games){
  home <- games %>%
    mutate(casa_V = calcV(hgoal, agoal),
           casa_E = calcE(hgoal, agoal),
           casa_D = calcD(hgoal, agoal),
           casa_PTS = calcPTS(hgoal,agoal)) %>%
    group_by(home) %>% summarise(casa_PTS = sum(casa_PTS),
                                 casa_J = length(home),
                                 casa_V = sum(casa_V),
                                 casa_E = sum(casa_E),
                                 casa_D = sum(casa_D),
                                 casa_GP = sum(as.numeric(hgoal)),
                                 casa_GS = sum(as.numeric(agoal)),
                                 casa_SG = sum(as.numeric(hgoal)) - sum(as.numeric(agoal))) %>%
    dplyr::rename(Time = home)
  
  away <- games %>%
    mutate(fora_V = calcV(agoal, hgoal),
           fora_E = calcE(agoal, hgoal),
           fora_D = calcD(agoal, hgoal),
           fora_PTS = calcPTS(agoal,hgoal)) %>%
    group_by(away) %>% summarise(fora_PTS = sum(fora_PTS),
                                 fora_J = length(away),
                                 fora_V = sum(fora_V),
                                 fora_E = sum(fora_E),
                                 fora_D = sum(fora_D),
                                 fora_GP = sum(as.numeric(agoal)),
                                 fora_GS = sum(as.numeric(hgoal)),
                                 fora_SG = sum(as.numeric(agoal)) - sum(as.numeric(hgoal))) %>%
    dplyr::rename(Time = away)
  
  total <- inner_join(home, away, by = 'Time') %>%
    mutate(PTS = casa_PTS + fora_PTS,
           J = casa_J + fora_J,
           V = casa_V + fora_V,
           E = casa_E + fora_E,
           D = casa_D + fora_D,
           GP = casa_GP + fora_GP,
           GS = casa_GS + fora_GS,
           SG = casa_SG + fora_SG) %>%
    select(Time, PTS, J, V, E, D, GP, GS, SG) %>%
    arrange(desc(PTS), desc(V), desc(SG), desc(GP)) %>%
    mutate(Pos = row_number()) %>%
    relocate(Pos) %>%
    mutate(AP = round(PTS / (J * 3) * 100, digits = 1))
  
  return(total)
}

current_date <- strftime(Sys.Date(), format = "%d-%m-%Y")
camcorder::gg_record(
  dir = file.path(here::here("camcorder_outputs")),
  device = "png",
  width = 18,
  height = 10,
  dpi = 300)
sysfonts::font_add_google(name = "IBM Plex Sans", family = "IBM")
showtext::showtext_auto()
showtext::showtext_opts(dpi = 300)
font <- "IBM"

Extraction and manipulation of data necessary for the model

The data used comes from the FBRef website and to try to increase the effectiveness of the model we will collect all the scores from the Brazilian Championship games since 2014. The games already played in 2023 will obviously be integrated into the model training part, which will then be applied to the games still to be played.

When this model was originally drawn up, on August 17th, the Brazilian Championship had just reached the end of its first round, with Botafogo as the undisputed leader. With 47 points in the 19 matches played in the first round, Botafogo equaled Corinthians’ performance in the first half of 2017 in points. The first tiebreaker criterion according to the championship regulations, the number of victories, gave Botafogo the best first round in history: they had 15 wins against 14 for Corinthians in 2017, who also made history of their own by going undefeated in their first 19 games.

folder <- "C:/R/Simuladores BR 2023/"
# Empty dataframe to store all final tables
montecarlo_tabelas <- setNames(data.frame(matrix(ncol = 12, nrow = 0)),
                               c('Pos', 'Time', 'PTS', 'J', 'V', 'E',
                                 'D', 'GP', 'GS', 'SG', 'AP', 'sim'))
# Dataframe list
montecarlo_tabelas_df <- list()

# Empty dataframe to store all matches
montecarlo_jogos <- setNames(data.frame(matrix(ncol = 10, nrow = 0)),
                             c('year', 'home', 'hgoal', 'agoal', 'away',
                               'p1', 'pX', 'p2', 'hxg', 'axg'))
# Dataframe list
montecarlo_jogos_df <- list()

# Extracting data from the 2023 Brazilian Championship from FBRef
data_2023 <- fb_match_results(country = "BRA",
                              gender = "M",
                              season_end_year = 2023,
                              tier = "1st") %>%
  clean_names() %>% factor_teams(c("home", "away")) %>% 
  rename(hgoal = home_goals, agoal = away_goals) %>% 
  select('date', 'home', 'away', 'hgoal', 'agoal')

# Teams list
times <- unique(data_2023$home)

# Extracting data from other editions available on FBRef
# These games will serve as model training
train_data <- fb_match_results(country = "BRA",
                               gender = "M",
                               season_end_year = c(2014,2015,2016,
                                                   2017,2018,2019,
                                                   2020,2021,2022),
                               tier = "1st") %>%
  clean_names() %>% factor_teams(c("home", "away")) %>% 
  rename(hgoal = home_goals, agoal = away_goals) %>% 
  select('date', 'home', 'away', 'hgoal', 'agoal')

# Separating the games already played in 2023
# These games will be part of the model training
played_2023 <- data_2023 %>% filter(!is.na(hgoal) & !is.na(agoal))
train_data <- rbind(train_data, played_2023)

# Separating the games not yet played in 2023
# These games will be the model test set
test_data <- data_2023 %>% filter(is.na(hgoal) & is.na(agoal))

# Creating a dataframe for all games since 2014
full_data <- rbind(train_data, test_data)

Model creation and visualization

In this model view, the summary will show all teams present in the data provided to the model. This means that all clubs participating in at least one edition of the Brazilian Championship since 2014 will be present.

pesos <- weights_dc(train_data$date, xi = 0.003)
model <- goalmodel(goals1 = train_data$hgoal,
                   goals2 = train_data$agoal,
                   team1 = train_data$home,
                   team2 = train_data$away,
                   dc = TRUE,
                   rs = TRUE,
                   model = 'poisson',
                   weights = pesos)
summary(model)
## Model sucsessfully fitted in 17.22 seconds
## 
## Number of matches          3670 
## Number of teams              34 
## 
## Model                     Poisson 
## 
## Log Likelihood            -975.96 
## AIC                        2091.92 
## R-squared                  0.12 
## Parameters (estimated)       70 
## Parameters (fixed)            0 
## 
## Team                      Attack   Defense
## América (MG)              0.11    -0.19 
## Ath Paranaense             0.24     0.02 
## Atl Goianiense            -0.00    -0.04 
## Atlético Mineiro          0.12     0.34 
## Avaí                     -0.11    -0.16 
## Bahia                      0.10    -0.03 
## Botafogo (RJ)              0.18     0.30 
## Bragantino                 0.23     0.07 
## Ceará                    -0.08     0.14 
## Chapecoense               -0.27    -0.21 
## Corinthians                0.12     0.10 
## Coritiba                   0.12    -0.34 
## Criciúma                 -0.29    -0.20 
## Cruzeiro                  -0.16     0.37 
## CSA                       -0.32    -0.11 
## Cuiabá                   -0.06     0.14 
## Figueirense               -0.26    -0.05 
## Flamengo                   0.34     0.05 
## Fluminense                 0.27     0.04 
## Fortaleza                  0.10     0.18 
## Goiás                    -0.06     0.04 
## Grêmio                    0.39    -0.06 
## Internacional              0.05     0.19 
## Joinville                 -0.44    -0.07 
## Juventude                 -0.16    -0.19 
## Palmeiras                  0.31     0.34 
## Paraná                   -0.67    -0.04 
## Ponte Preta               -0.04    -0.07 
## Santa Cruz                 0.14    -0.41 
## Santos                     0.11    -0.11 
## São Paulo                 0.15     0.10 
## Sport Recife              -0.34     0.18 
## Vasco da Gama              0.11    -0.11 
## Vitória                   0.06    -0.22 
## -------
## Intercept                 -0.12 
## Home field advantage       0.38 
## Dixon-Coles adj. (rho)    -0.02 
## Rue-Salvesen adj. (gamma) -0.38

Plot of variables for each team in the 2023 Brazilian Championship

coef <- as.data.frame(model[["parameters"]][["attack"]])
coef$Def <- model[["parameters"]][["defense"]]
colnames(coef)[1] <- 'Att'
coef$Ovr <- coef$Att + coef$Def
coef <- coef[,c(3,1,2)]
coef$Time <- row.names(coef)
coef <- coef %>% filter(`Time` %in% times)

coefplot <- coef %>% ggplot(aes(x = Def, y = Att)) +
  geom_point(shape=21, stroke=0, fill="orange", color = "black", size=8) +
  #geom_text_repel(aes(label = team)) +
  #geom_text(aes(label = Time), position = position_nudge(y = -0.06)) +
  geom_text(aes(label = Time), hjust = -0.2, size = 5) +
  theme_minimal(base_size = 20) +
  labs(title = "Estimativa de parâmetros dos times",
       y = "Ataque",
       x = "Defesa")

print(coefplot)
ggsave(paste(folder,
             current_date,
             ' - Coeficientes.png',
             sep = ''),
       plot = coefplot)
## Saving 7 x 5 in image

Defining the number of simulations and executing

Each iteration produces a final league table, after all clubs have played their 38 matches, and a list of the 380 scores from the games between the teams. All these tables and game lists are grouped into a single set, for reasons that will be explained below.

# Number of iterations
runs = 10000

for(n in 1:runs){
  run <- test_data

  for(i in 1:nrow(run)){
    plac <- predict_goals(
      model,
      team1 = run$home[i],
      team2 = run$away[i],
      return_df = TRUE,
      maxgoal = 15)
    plac$res <- paste(plac$goals1,plac$goals2,sep="x")
    plac <- plac[c(1,2,5,6)]
    plac$probability <- ifelse(plac$probability < 0,
                               abs(plac$probability), plac$probability)
    
    match <- sample(plac$res, 1, prob = plac$probability)
    match <- data.frame(test_data$date[i], test_data$home[i],
                        test_data$away[i], match)
    colnames(match) <- c('date', 'home', 'away', 'x')
    match[c('hgoal', 'agoal')] <- str_split_fixed(match$x, 'x', 2)
    match$x <- 'x'
    match <- match[c(1,2,5,6,3)]
    run <- rbind(run,match)
  }
  
  run <- run %>% drop_na(hgoal)
  simmed <- run %>% select(1,2,3,4,5)
  total <- rbind(played_2023, simmed)
  
  classificacao_casa <- total %>%
    mutate(casa_V = calcV(hgoal, agoal),
           casa_E = calcE(hgoal, agoal),
           casa_D = calcD(hgoal, agoal),
           casa_PTS = calcPTS(hgoal,agoal)) %>%
    group_by(home) %>% summarise(casa_PTS = sum(casa_PTS),
                                 casa_J = length(home),
                                 casa_V = sum(casa_V),
                                 casa_E = sum(casa_E),
                                 casa_D = sum(casa_D),
                                 casa_GP = sum(as.numeric(hgoal)),
                                 casa_GS = sum(as.numeric(agoal)),
                                 casa_SG = sum(as.numeric(hgoal)) - sum(as.numeric(agoal))) %>%
    dplyr::rename(Time = home)
  
  classificacao_fora <- total %>%
    mutate(fora_V = calcV(agoal, hgoal),
           fora_E = calcE(agoal, hgoal),
           fora_D = calcD(agoal, hgoal),
           fora_PTS = calcPTS(agoal,hgoal)) %>%
    group_by(away) %>% summarise(fora_PTS = sum(fora_PTS),
                                 fora_J = length(away),
                                 fora_V = sum(fora_V),
                                 fora_E = sum(fora_E),
                                 fora_D = sum(fora_D),
                                 fora_GP = sum(as.numeric(agoal)),
                                 fora_GS = sum(as.numeric(hgoal)),
                                 fora_SG = sum(as.numeric(agoal)) - sum(as.numeric(hgoal))) %>%
    dplyr::rename(Time = away)
  
  classificacao_final <- inner_join(classificacao_casa, classificacao_fora, by = 'Time') %>%
    mutate(PTS = casa_PTS + fora_PTS,
           J = casa_J + fora_J,
           V = casa_V + fora_V,
           E = casa_E + fora_E,
           D = casa_D + fora_D,
           GP = casa_GP + fora_GP,
           GS = casa_GS + fora_GS,
           SG = casa_SG + fora_SG) %>%
    select(Time, PTS, J, V, E, D, GP, GS, SG) %>%
    arrange(desc(PTS), desc(V), desc(SG), desc(GP)) %>%
    mutate(Pos = row_number()) %>%
    relocate(Pos) %>%
    mutate(AP = round(PTS / (J * 3) * 100, digits = 1)) %>%
    mutate(sim = n)
  
  montecarlo_tabelas <- do.call(rbind, list(montecarlo_tabelas, classificacao_final))
  montecarlo_tabelas_df <- c(montecarlo_tabelas_df, list(classificacao_final))
  run <- run %>% mutate(sim = n)
  montecarlo_jogos <- do.call(rbind, list(montecarlo_jogos, run))
  montecarlo_jogos_df <- c(montecarlo_jogos_df, list(run))
}

Creating an average dataframe across tables

Using the dataframe list of final standings previously created, an average dataframe will be generated that will allow application of Euclidean distance methods. After this, each final table is compared to the average dataframe and a Euclidean distance is calculated. The greater this distance, the greater the difference between the iteration and the average result.

After all iterations are evaluated, a distances_df dataframe is created listing the number of each iteration and its Euclidean distance from the average dataframe. This distances_df dataframe also has a probability column. The closer to the average dataframe, the higher the probability value. This column can then be used in a sample function with weight that allows us to draw an iteration X and check how the final table of that iteration turned out.

# Calculate the average dataframe
if (!all(sapply(montecarlo_tabelas_df, function(df) identical(dim(df), dim(montecarlo_tabelas_df[[1]]))))) {
  stop("All dataframes must have the same dimensions.")
}
preprocess_dataframe <- function(df) {
  df_numeric <- as.data.frame(lapply(df, function(col) as.numeric(as.character(col))))
  return(df_numeric)
}
list_of_dataframes_numeric <- lapply(montecarlo_tabelas_df, preprocess_dataframe)
all_data <- array(unlist(list_of_dataframes_numeric), dim = c(nrow(list_of_dataframes_numeric[[1]]), ncol(list_of_dataframes_numeric[[1]]), length(list_of_dataframes_numeric)))
average_dataframe <- apply(all_data, c(1, 2), mean)
distances <- apply(all_data, 3, function(df) dist(rbind(df, average_dataframe))[1])
distances_vector <- unlist(distances)
distances_df <- data.frame(Index = seq_along(distances_vector), Distance = distances_vector)
distances_df <- distances_df %>%
  arrange(desc(Distance)) %>%
  mutate(prob = Distance / sum(Distance))
distances_df$prob <- distances_df$prob / sum(distances_df$prob)

head(select(distances_df, -2), n = 10)
##    Index         prob
## 1   1986 0.0003327426
## 2   3312 0.0003013899
## 3   2687 0.0002941617
## 4   8010 0.0002938568
## 5   4458 0.0002890341
## 6    259 0.0002879055
## 7   6037 0.0002840310
## 8   1128 0.0002830082
## 9    470 0.0002820355
## 10  1797 0.0002798233

Assembly of the final Montecarlo method table

Unlike the previous stage, where the focus was only on mathematically analyzing how close the dataframes were to each other, the objective here is to create the final championship table. In short, each club’s total points, goals, wins, draws and losses are divided by the number of iterations and organized according to the competition’s tiebreaker criteria.

# Montar classificação média
classificacao_media <- montecarlo_tabelas %>% group_by(Time) %>%
  summarise(PTS = round(mean(PTS)),
            J = round(mean(J)),
            V = round(mean(V)),
            E = round(mean(E)),
            D = round(mean(D)),
            GP = round(mean(GP)),
            GS = round(mean(GS)),
            SG = round(mean(SG))) %>%
  arrange(desc(PTS), desc(V), desc(SG), desc(GP)) %>%
  mutate(Pos = row_number()) %>%
  relocate(Pos)

head(classificacao_media, n = 8)
## # A tibble: 8 × 10
##     Pos Time               PTS     J     V     E     D    GP    GS    SG
##   <int> <fct>            <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1     1 Botafogo (RJ)       74    38    22     8     8    57    28    29
## 2     2 Palmeiras           69    38    19    11     8    60    30    30
## 3     3 Grêmio              64    38    19     8    11    61    49    12
## 4     4 Flamengo            64    38    18    10    10    56    45    11
## 5     5 Bragantino          64    38    17    12     8    52    37    15
## 6     6 Atlético Mineiro    61    38    17    11    10    46    30    15
## 7     7 Fluminense          60    38    17     8    13    52    45     7
## 8     8 Ath Paranaense      58    38    16    10    12    54    45     9

Correction and standardization of team names

Next, we will use another website to obtain the logos for each team. Some teams are named differently in both sources, so we need to make some changes to the data.

classificacao_media$Time <- as.character(classificacao_media$Time)
classificacao_media[classificacao_media == 'Ath Paranaense'] <- 'Athletico'
classificacao_media[classificacao_media == 'Botafogo (RJ)'] <- 'Botafogo'
classificacao_media[classificacao_media == 'Bragantino'] <- 'RB Bragantino'

Probabilities of finishing by club and position

Again using the previously established list of dataframes, we will create a visualization showing in percentage how many times each club finished in each of the 20 positions. This can therefore be considered the probability of each club finishing in each position. At least according to the prediction capacity of our model, with the result to be confirmed at the end of the championship.

resumo <- montecarlo_tabelas %>%
  group_by(Pos, Time) %>%
  tally(name = "Total") %>%
  mutate(prob = Total / runs)

resumo$Time <- as.character(resumo$Time)
resumo[resumo == 'Ath Paranaense'] <- 'Athletico'
resumo[resumo == 'Botafogo (RJ)'] <- 'Botafogo'
resumo[resumo == 'Bragantino'] <- 'RB Bragantino'

resumoplot <- resumo %>%
  ggplot(aes(x = Pos,
             y = fct_reorder(Time,-Pos),
             fill = prob)) +
  geom_tile() +
  scale_x_continuous(breaks = seq(0, 24, 1),
                     expand = c(0.03, 0)) +
  scale_fill_continuous(low = "white", high = "#72aeb6") +
  geom_text(aes(label = paste0(prob * 100, "%"),
                size = 2,
                family = font)) +
  labs(title = 'Probabilidade por posição no Campeonato Brasileiro 2023',
       subtitle = gt::md(glue::glue("Simulado em {current_date}")),
       y = "",
       x = "Posição") +
  theme(plot.title = element_text(family = font, size = 30, face = "bold"),
        panel.grid.major = element_blank(),
        panel.background = element_blank(),
        legend.position = "none",
        axis.text.y = element_text(size = 14, family = font),
        axis.ticks = element_blank(),
        plot.subtitle = element_text(size = 16),
        axis.text.x = element_text(size = 12),
        axis.title.x = element_text(size = 16, family = font),
        plot.title.position = "plot",
        plot.caption = element_text(size = 12)) 

qtd_times <- classificacao_media %>% pull(Time) %>% n_distinct()
print(resumoplot)
ggsave(paste(folder, current_date, ' - Posições.png', sep = ''),
       plot = resumoplot, width = 18, height = 10)

Zone probabilities by club

resumo_zonas <- resumo %>%
  mutate(Zona = case_when(
    Pos >= 1 & Pos <= 6 ~ "Libertadores",
    Pos >= 7 & Pos <= 12 ~ "Sulamericana",
    Pos >= 17 & Pos <= 20 ~ "Rebaixamento",
    TRUE ~ "Outro")) %>%
  group_by(Zona, Time) %>%
  summarise(Count = sum(Total)) %>%
  mutate(prob = Count / runs * 100) %>%
  arrange(desc(Count))

resumo_lib <- resumo_zonas %>%
  subset(Zona == "Libertadores") %>%
  select(2, 4)

resumo_sula <- resumo_zonas %>%
  subset(Zona == "Sulamericana") %>%
  select(2, 4)

resumo_reb <- resumo_zonas %>%
  subset(Zona == "Rebaixamento") %>%
  select(2, 4)

Libertadores probabilities

resumo_lib_plot <- resumo_lib %>%
  arrange(desc(prob)) %>%
  ggplot(aes(x = Zona,
             y = fct_reorder(Time,prob),
             fill = prob)) +
  geom_tile() +
  scale_fill_continuous(low = "pink", high = "#72aeb6") +
  geom_text(aes(label = paste0(prob, "%"),
                size = 2,
                family = font)) +
  labs(title = 'Chances de Libertadores',
       y = "",
       x = "") +
  theme(plot.title = element_text(family = font, size = 30, face = "bold"),
        panel.grid.major = element_blank(),
        panel.background = element_blank(),
        legend.position = "none",
        axis.text.y = element_text(size = 14, family = font),
        axis.ticks = element_blank(),
        plot.subtitle = element_text(size = 16),
        axis.text.x = element_text(size = 12),
        axis.title.x = element_text(size = 16, family = font),
        plot.title.position = "plot",
        plot.caption = element_text(size = 12),
        plot.margin = margin(10, 1000, 1, 10, "pt"))

head(resumo_lib, n = 20)
## # A tibble: 16 × 3
## # Groups:   Zona [1]
##    Zona         Time              prob
##    <chr>        <chr>            <dbl>
##  1 Libertadores Botafogo         99.8 
##  2 Libertadores Palmeiras        97.0 
##  3 Libertadores Grêmio           81.3 
##  4 Libertadores RB Bragantino    76.4 
##  5 Libertadores Flamengo         76.0 
##  6 Libertadores Atlético Mineiro 60.9 
##  7 Libertadores Fluminense       45.4 
##  8 Libertadores Athletico        33.8 
##  9 Libertadores Fortaleza        24.2 
## 10 Libertadores São Paulo         2.95
## 11 Libertadores Corinthians       0.79
## 12 Libertadores Cuiabá            0.69
## 13 Libertadores Internacional     0.36
## 14 Libertadores Cruzeiro          0.23
## 15 Libertadores Goiás             0.01
## 16 Libertadores Vasco da Gama     0.01

Sul-Americana probabilities

resumo_sula_plot <- resumo_sula %>%
  arrange(desc(prob)) %>%
  ggplot(aes(x = Zona,
             y = fct_reorder(Time,prob),
             fill = prob)) +
  geom_tile() +
  scale_fill_continuous(low = "pink", high = "#72aeb6") +
  geom_text(aes(label = paste0(prob, "%"),
                size = 2,
                family = font)) +
  labs(title = 'Chances de Sulamericana',
       y = "",
       x = "") +
  theme(plot.title = element_text(family = font, size = 30, face = "bold"),
        panel.grid.major = element_blank(),
        panel.background = element_blank(),
        legend.position = "none",
        axis.text.y = element_text(size = 14, family = font),
        axis.ticks = element_blank(),
        plot.subtitle = element_text(size = 16),
        axis.text.x = element_text(size = 12),
        axis.title.x = element_text(size = 16, family = font),
        plot.title.position = "plot",
        plot.caption = element_text(size = 12),
        plot.margin = margin(10, 1000, 1, 10, "pt"))

head(resumo_sula, n = 20)
## # A tibble: 19 × 3
## # Groups:   Zona [1]
##    Zona         Time              prob
##    <chr>        <chr>            <dbl>
##  1 Sulamericana Fortaleza        74.1 
##  2 Sulamericana São Paulo        73.0 
##  3 Sulamericana Athletico        65.1 
##  4 Sulamericana Cuiabá           56.1 
##  5 Sulamericana Corinthians      55.4 
##  6 Sulamericana Fluminense       54.1 
##  7 Sulamericana Internacional    47.7 
##  8 Sulamericana Atlético Mineiro 39.1 
##  9 Sulamericana Cruzeiro         38.7 
## 10 Sulamericana Flamengo         23.9 
## 11 Sulamericana RB Bragantino    23.5 
## 12 Sulamericana Grêmio           18.7 
## 13 Sulamericana Goiás             9.49
## 14 Sulamericana Vasco da Gama     6.38
## 15 Sulamericana Santos            6.05
## 16 Sulamericana Bahia             5.67
## 17 Sulamericana Palmeiras         2.96
## 18 Sulamericana Botafogo          0.2 
## 19 Sulamericana América (MG)      0.02

Relegation probabilities

resumo_reb_plot <- resumo_reb %>%
  arrange(desc(prob)) %>%
  ggplot(aes(x = Zona,
             y = fct_reorder(Time,prob),
             fill = prob)) +
  geom_tile() +
  scale_fill_continuous(low = "pink", high = "#72aeb6") +
  geom_text(aes(label = paste0(prob, "%"),
                size = 2,
                family = font)) +
  labs(title = 'Chances de Rebaixamento',
       y = "",
       x = "") +
  theme(plot.title = element_text(family = font, size = 30, face = "bold"),
        panel.grid.major = element_blank(),
        panel.background = element_blank(),
        legend.position = "none",
        axis.text.y = element_text(size = 14, family = font),
        axis.ticks = element_blank(),
        plot.subtitle = element_text(size = 16),
        axis.text.x = element_text(size = 12),
        axis.title.x = element_text(size = 16, family = font),
        plot.title.position = "plot",
        plot.caption = element_text(size = 12),
        plot.margin = margin(10, 1000, 1, 10, "pt"))

head(resumo_reb, n = 20)
## # A tibble: 13 × 3
## # Groups:   Zona [1]
##    Zona         Time           prob
##    <chr>        <chr>         <dbl>
##  1 Rebaixamento Coritiba      99.5 
##  2 Rebaixamento América (MG)  98.1 
##  3 Rebaixamento Bahia         51.3 
##  4 Rebaixamento Vasco da Gama 47.7 
##  5 Rebaixamento Santos        46.6 
##  6 Rebaixamento Goiás         35.3 
##  7 Rebaixamento Cruzeiro       7.61
##  8 Rebaixamento Internacional  5.29
##  9 Rebaixamento Corinthians    4.15
## 10 Rebaixamento Cuiabá         3.23
## 11 Rebaixamento São Paulo      1.23
## 12 Rebaixamento Athletico      0.01
## 13 Rebaixamento Fluminense     0.01

Starting to create the visual final table

# Simple function to extract each team's logo
logo_image <- function(team_id, width = 20) {
  glue::glue("https://images.fotmob.com/image_resources/logo/teamlogo/{team_id}.png")
}

# Campeonato Brasileiro logo
league_logo <- "https://images.fotmob.com/image_resources/logo/leaguelogo/268.png"

# Creation of an auxiliary table with the name of each team
# and a link to the respective logo
team_ids <- fotmob_get_league_tables(league_id = 268) %>%
  filter(table_idx == 1:20) %>% slice(1:20)
team_ids <- team_ids %>%
  mutate(image_link = logo_image(team_id = unique(team_ids$table_id))) %>%
  select(4, 19)
colnames(team_ids)[1] <- 'Time'

# Again correction and standardization of team names
# Essential for full join
team_ids[team_ids == "America MG"] <- "América (MG)"
team_ids[team_ids == "Athletico Paranaense"] <- 'Athletico'
team_ids[team_ids == "Atletico MG"] <- 'Atlético Mineiro'
team_ids[team_ids == "Cuiaba"] <- 'Cuiabá'
team_ids[team_ids == "Goias"] <- 'Goiás'
team_ids[team_ids == "Gremio"] <- 'Grêmio'
team_ids[team_ids == "Red Bull Bragantino"] <- 'RB Bragantino'
team_ids[team_ids == "Santos FC"] <- 'Santos'
team_ids[team_ids == "Sao Paulo"] <- 'São Paulo'

classificacao_media <- full_join(classificacao_media, team_ids, by = 'Time') %>%
  relocate(image_link, .after = Pos)

Calculating table according to games played to date

table_today <- calcTAB(played_2023)
table_today <- table_today[, -ncol(table_today)]
table_today$Time <- as.character(table_today$Time)
table_today[table_today == 'Ath Paranaense'] <- 'Athletico'
table_today[table_today == 'Botafogo (RJ)'] <- 'Botafogo'
table_today[table_today == 'Bragantino'] <- 'RB Bragantino'
table_today <- full_join(table_today, team_ids, by = 'Time') %>%
  relocate(image_link, .after = Pos)

head(select(table_today, -2), n = 8)
## # A tibble: 8 × 10
##     Pos Time               PTS     J     V     E     D    GP    GS    SG
##   <int> <chr>            <dbl> <int> <int> <int> <int> <dbl> <dbl> <dbl>
## 1     1 Botafogo            52    25    16     4     5    40    16    24
## 2     2 RB Bragantino       45    25    12     9     4    35    22    13
## 3     3 Grêmio              44    25    13     5     7    40    32     8
## 4     4 Palmeiras           44    25    12     8     5    39    20    19
## 5     5 Flamengo            43    25    12     7     6    37    30     7
## 6     6 Fluminense          41    25    12     5     8    34    29     5
## 7     7 Atlético Mineiro    40    25    11     7     7    29    19    10
## 8     8 Athletico           40    25    11     7     7    37    29     8

Final plot of simulated standings

(
  sim <-
    classificacao_media %>%
    gt::gt()  |>
    ##logos
    gtExtras::gt_img_rows(column = image_link, height = 20) |>
    ##change column names
    gt::cols_label(image_link = "")  %>%
    ##apply 538 theme
    gtExtras::gt_theme_538()  %>%
    ##highlight rows for top 4/5/and bottom 3
    gtExtras::gt_highlight_rows(
      columns = everything(),
      rows = 1:4,
      fill = '#ACE1AF',
      font_weight = "normal"
    )  |>
    gtExtras::gt_highlight_rows(
      columns = everything(),
      rows = 5:6,
      fill = '#D0F0C0',
      font_weight = "normal"
    )  |>
    gtExtras::gt_highlight_rows(
      columns = everything(),
      rows = 7:12,
      fill = '#FFDEAD',
      font_weight = "normal"
    )  |>
    gtExtras::gt_highlight_rows(
      columns = everything(),
      rows = 17:20,
      fill = '#FFCCCC',
      font_weight = "normal"
    )  |>
    ##align text
    gt::cols_align("center")  |>
    gt::cols_align(align = 'left',
                   columns = Time)  |>
    gt::cols_width(Time ~ px(165))  |>
    gt::cols_width(PTS ~ px(35))  |>
    gt::cols_width(J ~ px(35))  |>
    gt::cols_width(V ~ px(35))  |>
    gt::cols_width(E ~ px(35))  |>
    gt::cols_width(D ~ px(35))  |>
    gt::cols_width(GP ~ px(35))  |>
    gt::cols_width(GS ~ px(35))  |>
    gt::cols_width(SG ~ px(35))  |>
    gt::cols_width(SG ~ px(35))  |>
    gt::tab_style(style = cell_text(weight = 'bold'),
                  locations  = cells_body(columns = c(PTS, Pos)))  |>
    ##format title and subtitle (including league logo)
    gt::tab_header(
      title = gt::md(
        glue::glue(
          "<img src='{league_logo}' style='height:60px;'><br>Brasileirão 2023"
        )
      ),
      subtitle = gt::md(glue::glue("Simulado em **{current_date}**"))
    ))

Brasileirão 2023
Simulado em 04-10-2023
Pos Time PTS J V E D GP GS SG
1 Botafogo 74 38 22 8 8 57 28 29
2 Palmeiras 69 38 19 11 8 60 30 30
3 Grêmio 64 38 19 8 11 61 49 12
4 Flamengo 64 38 18 10 10 56 45 11
5 RB Bragantino 64 38 17 12 8 52 37 15
6 Atlético Mineiro 61 38 17 11 10 46 30 15
7 Fluminense 60 38 17 8 13 52 45 7
8 Athletico 58 38 16 10 12 54 45 9
9 Fortaleza 57 38 16 10 12 44 37 7
10 São Paulo 51 38 13 11 14 46 43 3
11 Cuiabá 49 38 13 9 16 39 44 -5
12 Corinthians 49 38 12 13 13 45 46 0
13 Internacional 48 38 12 12 14 35 42 -7
14 Cruzeiro 47 38 11 13 13 35 33 2
15 Goiás 43 38 10 13 15 34 47 -13
16 Vasco da Gama 41 38 11 8 19 41 57 -16
17 Santos 41 38 11 9 18 41 61 -20
18 Bahia 40 38 10 11 17 42 51 -9
19 América (MG) 31 38 7 9 22 41 73 -32
20 Coritiba 28 38 7 8 23 40 78 -37

Final plot of today’s standings

(
  act <-
    table_today %>%
    gt::gt()  |>
    ##logos
    gtExtras::gt_img_rows(column = image_link, height = 20) |>
    ##change column names
    gt::cols_label(image_link = "")  %>%
    ##apply 538 theme
    gtExtras::gt_theme_538()  %>%
    ##highlight rows for top 4/5/and bottom 3
    gtExtras::gt_highlight_rows(
      columns = everything(),
      rows = 1:4,
      fill = '#ACE1AF',
      font_weight = "normal"
    )  |>
    gtExtras::gt_highlight_rows(
      columns = everything(),
      rows = 5:6,
      fill = '#D0F0C0',
      font_weight = "normal"
    )  |>
    gtExtras::gt_highlight_rows(
      columns = everything(),
      rows = 7:12,
      fill = '#FFDEAD',
      font_weight = "normal"
    )  |>
    gtExtras::gt_highlight_rows(
      columns = everything(),
      rows = 17:20,
      fill = '#FFCCCC',
      font_weight = "normal"
    )  |>
    ##align text
    gt::cols_align("center")  |>
    gt::cols_align(align = 'left',
                   columns = Time)  |>
    gt::cols_width(Time ~ px(165))  |>
    gt::cols_width(PTS ~ px(35))  |>
    gt::cols_width(J ~ px(35))  |>
    gt::cols_width(V ~ px(35))  |>
    gt::cols_width(E ~ px(35))  |>
    gt::cols_width(D ~ px(35))  |>
    gt::cols_width(GP ~ px(35))  |>
    gt::cols_width(GS ~ px(35))  |>
    gt::cols_width(SG ~ px(35))  |>
    gt::cols_width(SG ~ px(35))  |>
    gt::tab_style(style = cell_text(weight = 'bold'),
                  locations  = cells_body(columns = c(PTS, Pos)))  |>
    ##format title and subtitle (including league logo)
    gt::tab_header(
      title = gt::md(
        glue::glue(
          "<img src='{league_logo}' style='height:60px;'><br>Brasileirão 2023"
        )
      ),
      subtitle = gt::md(glue::glue("Classificação em **{current_date}**"))
    ))

Brasileirão 2023
Classificação em 04-10-2023
Pos Time PTS J V E D GP GS SG
1 Botafogo 52 25 16 4 5 40 16 24
2 RB Bragantino 45 25 12 9 4 35 22 13
3 Grêmio 44 25 13 5 7 40 32 8
4 Palmeiras 44 25 12 8 5 39 20 19
5 Flamengo 43 25 12 7 6 37 30 7
6 Fluminense 41 25 12 5 8 34 29 5
7 Atlético Mineiro 40 25 11 7 7 29 19 10
8 Athletico 40 25 11 7 7 37 29 8
9 Fortaleza 39 25 11 6 8 30 24 6
10 São Paulo 34 25 9 7 9 31 27 4
11 Cuiabá 32 25 9 5 11 26 30 -4
12 Cruzeiro 30 25 7 9 9 24 22 2
13 Corinthians 30 25 7 9 9 29 31 -2
14 Internacional 29 25 7 8 10 20 29 -9
15 Santos 27 25 7 6 12 27 41 -14
16 Goiás 27 25 6 9 10 21 31 -10
17 Vasco da Gama 26 25 7 5 13 27 38 -11
18 Bahia 25 25 6 7 12 28 34 -6
19 América (MG) 18 25 4 6 15 27 51 -24
20 Coritiba 17 25 4 5 16 27 53 -26
gt::gtsave(act, paste(folder, current_date, ' - Tabela HOJE.png', sep = ''), expand = 60)
gt::gtsave(sim, paste(folder, current_date, ' - Tabela FINAL.png', sep = ''), expand = 60)